SplitWithOverlap
沿指定轴(axis)将输入张量切分为多个输出张量。与标准 Split 不同,该算子允许通过 start_indices 和 end_indices 自定义每个输出块的起始和结束位置,从而支持输出块之间的重叠(Overlap)。
\[\text{对于第 } j \text{ 个输出张量,其在 axis 轴上的第 } k \text{ 个元素对应:}\]
\[Output[j]_{(\dots, k, \dots)} = Input_{(\dots, start\_indices[j] + k, \dots)} \quad \text{其中 } 0 \le k < (end\_indices[j] - start\_indices[j])\]
- 输入:
input - 输入张量数据地址。
outputs - 输出张量地址数组(指针数组)。
axis - 进行切分的轴索引。
input_shape - 输入张量的形状数组。
input_ndim - 输入张量的维度。
num_split - 输出张量的数量。
start_indices - 每个输出张量在切分轴上的起始索引数组。
end_indices - 每个输出张量在切分轴上的结束索引数组。
core_mask(int, 可选) - 核掩码(仅适用于共享存储版本)。
- 输出:
outputs - 各个输出张量中填充了切分后的数据。
- 支持平台:
FT78NEMT7004
备注
FT78NE 支持 int8, int16, int32, fp32, fp64, cplx64, cplx128
MT7004 支持 fp16, fp32, int16, int32, cplx64
算子支持不连续切分或有重叠的切分。
每个输出张量在非切分轴上的维度与输入张量保持一致。
共享存储版本:
-
void i8_split_with_overlap_s(int8_t *input, int8_t *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices, int core_mask)
-
void i16_split_with_overlap_s(int16_t *input, int16_t *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices, int core_mask)
-
void i32_split_with_overlap_s(int32_t *input, int32_t *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices, int core_mask)
-
void hp_split_with_overlap_s(half *input, half *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices, int core_mask)
-
void fp_split_with_overlap_s(float *input, float *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices, int core_mask)
-
void dp_split_with_overlap_s(double *input, double *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices, int core_mask)
-
void c64_split_with_overlap_s(float *input, float *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices, int core_mask)
-
void c128_split_with_overlap_s(double *input, double *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices, int core_mask)
C调用示例:
1//FT78NE示例(共享存储) 2#include "78NE/utils.h" 3 4int main() { 5 float *input = (float *)0xA0000000; 6 float *out0 = (float *)0xB0000000; 7 float *out1 = (float *)0xB1000000; 8 float *outputs[] = {out0, out1}; 9 int input_shape[] = {8, 200, 10}; 10 int start_indices[] = {0, 150}; 11 int end_indices[] = {80, 200}; 12 int core_mask = 0xFF; 13 14 fp_split_with_overlap_s(input, outputs, 1, input_shape, 3, 2, start_indices, end_indices, core_mask); 15 return 0; 16}
私有存储版本:
-
void i8_split_with_overlap_p(int8_t *input, int8_t *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices)
-
void i16_split_with_overlap_p(int16_t *input, int16_t *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices)
-
void i32_split_with_overlap_p(int32_t *input, int32_t *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices)
-
void hp_split_with_overlap_p(half *input, half *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices)
-
void fp_split_with_overlap_p(float *input, float *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices)
-
void dp_split_with_overlap_p(double *input, double *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices)
-
void c64_split_with_overlap_p(float *input, float *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices)
-
void c128_split_with_overlap_p(double *input, double *outputs[], int axis, int *input_shape, int input_ndim, int num_split, int *start_indices, int *end_indices)
C调用示例:
1//MT7004 示例(私有存储) 2#include <stdio.h> 3 4int main() { 5 float *input = (float *)0x10810000; 6 float *out0 = (float *)0x10820000; 7 float *outputs[] = {out0}; 8 int input_shape[] = {4, 10, 5}; 9 int start_idx[] = {0}; 10 int end_idx[] = {5}; 11 12 fp_split_with_overlap_p(input, outputs, 1, input_shape, 3, 1, start_idx, end_idx); 13 return 0; 14}